# Chapter 4
library(SensoMineR)

experts <- read.csv("data/sensory/perfumes_qda_experts.csv")
experts$Session <- as.factor(experts$Session)
names(experts)
options(contrasts=c("contr.sum","contr.sum"))
Attributes=names(experts)[5:ncol(experts)]
adjmean=NULL
for (i in Attributes) {
  tmp=coef(lm(experts[,i]~(Product+Panelist+Session)^2-1,data=experts))[1:12]
  adjmean=cbind(adjmean,tmp)
}

colnames(adjmean)=Attributes
rownames(adjmean)=sort(experts[,"Product"][1:12])

round(adjmean,3)

library(FactoMineR)
resPCA <- PCA(adjmean)
names(resPCA)
resPCA$eig
resPCA$ind$cos2
resPCA$var$cor
dimdesc(resPCA)$Dim.1$quanti


library("factoextra")
fviz_screeplot(resPCA)
dev.new();fviz_eig(resPCA,choice = c("variance", "eigenvalue")[1])

#Graph of individuals. Individuals with a similar profile are grouped together.
resPCA$ind$coord=round(resPCA$ind$coord,4)
resPCA$ind$coord
dev.new();fviz_pca_ind(resPCA,
                       col.ind = "cos2", # Color by the quality of representation
                       gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                       repel = TRUE     # Avoid text overlapping
)

dev.new();plot.PCA(resPCA,choix="ind",select="contrib 5",col.ind="blue")
dev.new();plot.PCA(resPCA,choix="ind",axes=c(2,3))

#Graph of variables. Positive correlated variables point to the same side of the plot. Negative correlated variables point to opposite sides of the graph.
resPCA$var$coord=round(resPCA$var$coord,4)
resPCA$var$coord
dev.new();fviz_pca_var(resPCA,
                       col.var = "contrib", # Color by contributions to the PC
                       gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                       repel = TRUE     # Avoid text overlapping
)
dev.new();plot.PCA(resPCA,choix="var",select="contrib 6",col.var="purple")


#Biplot of individuals and variables
dev.new();fviz_pca_biplot(resPCA,
                          col.var = "#2E9FDF", # Variables color
                          col.ind = "#696969",  # Individuals color
                          repel = TRUE
)




# Cluster

##=======Case 1. Hierarchical Clustering
E.dist <- dist(adjmean, method="euclidean") # Euclidean distance
hCluster <- hclust(E.dist, method="ward.D2")
dev.new();plot(hCluster, xlab="Euclidean distance")


dev.new()
par(mfrow=c(1,2)) 
# Apply Euclidean distance to grouping work
E.dist <- dist(adjmean, method="euclidean") # Euclidean distance
h.E.cluster <- hclust(E.dist)
dev.new();plot(h.E.cluster, xlab="Euclidean distance")

# Apply Manhattan distance to grouping work
M.dist <- dist(adjmean, method="manhattan") # Manhattan distance
h.M.cluster <- hclust(M.dist) 
plot(h.M.cluster, xlab="Manhattan")
par(mfrow=c(1,1))


library(FactoMineR)
resHCPC <- HCPC(as.data.frame(adjmean), 
                metric="euclidean", 
                method="ward", 
                nb.clust=-1,
                min=4, 
                graph=FALSE)
names(resHCPC)
dev.new();plot(resHCPC,choice="tree")
dev.new();plot(resHCPC,choice="bar")
dev.new();plot(resHCPC,choice="3D.map")

dev.new();plot(resHCPC$call$t$tree)

resHCPC$desc.var$quanti



##=======Case 2. Kmeans Partitioning Clustering
# Method 1. kmeans
kmeans.cluster <- kmeans(adjmean, centers=4) 

# variance of withinss
kmeans.cluster$withinss

# Labels of clusters
kmeans.cluster$cluster

# Visualize k-means results(ggplot2)
require(factoextra)
dev.new();fviz_cluster(kmeans.cluster,           # results
                       data = adjmean,           # raw data
                       geom = c("point","text"), # point & label
                       ellipse.type = "norm")      # type of frame

# Method 2. K-Medoid needs function pam(), which is in package cluster
require(cluster)

# pam = Partitioning Around Medoids
kmedoid.cluster <- pam(adjmean, k=4) 

# Dissimilarities
kmedoid.cluster$diss

# Labels of clusters
kmedoid.cluster$cluster


# Visualize k-Medoid results(ggplot2)
require(factoextra)
dev.new();fviz_cluster(kmedoid.cluster,       # results
                       data = adjmean,        # raw data
                       geom = c("point","text"),     # point
                       ellipse.type = "norm")   # type of frame







library(SensoMineR)
experts <- read.csv("data/sensory/perfumes_qda_experts.csv")
res <- panellipse(experts, col.p = 4, col.j = 1, firstvar = 5)
dev.new();plot(res$graph$plotInd)
dev.new();plot(res$graph$plotVar)
dev.new();plot(res$graph$plotIndEll)
dev.new();plot(res$graph$plotPan)
dev.new();plot(res$graph$plotVarVariab)

cho <- read.csv("data/sensory/chocolates.csv")
out<-panellipse(cho, col.p = 4, col.j = 1, firstvar = 5)
coltable(out$hotelling, main.title = "P-values for the Hotelling's T2 tests")











if(FALSE){
  .PC <-princomp(adjmean)
  dev.new();biplot(.PC,cex=0.8) 
  abline(h=0,v=0,lty=3,col=("blue"))
  
  dev.new();plot(.PC, col=c("lightblue"))
  dev.new();screeplot(.PC, col=c("lightblue"))
  
  
  summary(.PC, loadings=TRUE)
  names(.PC)
  .PC$sdev
  
  .PC$scores 
  predict(.PC) #Both of them are equal. 
  
  loadings(.PC)
  round(unclass(loadings(.PC))[,1:2],4)
  PC1=as.matrix(experts[,5:ncol(experts)]) %*% as.matrix(unclass(loadings(.PC))[,1])
  
}



if (FALSE) {
  Attributes=names(experts)[5:ncol(experts)]
  Vtest=NULL
  for (i in Attributes) {
    res=summary(aov(experts[,i]~Product+Panelist, 
                    data = experts, 
                    na.action = na.exclude))[[1]]
    
    #Normal approximation to F distribution
    Vtest.pvalue=pf(res[1, 4], res[1, 1], res[nrow(res), 1], lower.tail = FALSE)
    Vtest.stat=abs(qnorm(Vtest.pvalue))
    Vtest=rbind(Vtest,cbind(Vtest.stat,Vtest.pvalue))
    
  }
  colnames(Vtest)=c("Vtest","P-value")
  rownames(Vtest)=Attributes
  
  Vtest[order(Vtest[,1], decreasing = TRUE),]
  
  out.decat <- decat(experts,
                     formul="~Product+Panelist",
                     firstvar=5,
                     lastvar=ncol(experts),
                     graph=F)
  out.decat$adjmean
  names(out.decat)
  out.decat$tabF
  out.decat$resF
  
}